#!/usr/bin/env perl

=head1 NAME

asmbl_cmd_forker.pl

=head1 SYNOPSIS

    USAGE: asmbl_cmd_forker.pl -L asmbl_list_file -f command_list_file -n num_simultaneous_processes

=head1 OPTIONS

########################################################
#
# -L filename of list of asmbls to process
#
# -f filename of commands to run on each asmbl (place ASMBL where \$asmbl should be)
#
# -n number of processes to run simultaneously. (default 1 at a time)
#
# -d debug
# -S verbose
# -h print this and exit.
#
#########################################################


=head1  DESCRIPTION

This script allows you to run a pipeline of commands on a list of asmbl_ids.
For each asmbl_id in the asmbl_list_file (-L), each command in the command_file (-f) is 
run.

If you have multiple CPUs, you can process more than one asmbl_id simultaneously, using -n option.

=head1  INPUT

-L asmbl_list_file:  a list of asmbl_ids in a file, one asmbl_id per line.
-f command_file: a list of commands.  Put the token 'ASMBL' where the asmbl_id would be substituted.

=head1  OUTPUT

A directory is created called acf.logs.$pid, which contains the stdout and stderr generated by each command and for each asmbl_id.

=head1  CONTACT

    Brian Haas
    bhaas@tigr.org

=begin comment
    this section doesn't show up in viewers but is used for parsing.
    status: active
    keywords: pipeline processing 
=end comment

=cut



use strict;

require "getopts.pl";

use vars qw ($opt_f $opt_S $opt_h $opt_n $opt_L $list $opt_d $DEBUG $splink @ids @cmds $cmd @x $asmbl);

$|=1;

&Getopts('dD:L:Sp:f:hn:');



if ($opt_h) {
    die <<_EOH_;

########################################################
#
# -L filename of list of asmbls to process
#
# -f filename of commands to run on each asmbl (place ASMBL where \$asmbl should be)
#
# -n number of processes to run simultaneously.
#
# -d debug
# -S verbose
# -h print this and exit.
#
#########################################################

_EOH_

}



if (length($opt_L) > 0) { $list = $opt_L; } else { &option_die;}
if (length($opt_d) > 0) { $DEBUG = 1; } else { $DEBUG = 0; }
my ($SEE) = ($opt_S) ? 1:0;
my ($number_processes_allowed) = ($opt_n) ? $opt_n : 1;
my $commands;
unless ( $commands = $opt_f) {die;}

# read in list of assemblies
open (LIST, $list) || die "Cant open $list";
while ($splink = <LIST>) {
    $splink =~ s/\s//g;
    print "SPLINK: $splink \n" if ($DEBUG);
    print "A: $splink\n" if ($DEBUG);
    push(@ids, $splink);
}

close(LIST);


## Read in command lines
print "Reading Commands\n" if $SEE;
open (COM, "$commands");
my $line;
while ($line = <COM>) {
    if ($line =~ /^\#/) {next;} #ignore commented out command lines.
    if ($line =~ /\w/) {
	print "Read Command: $line" if $SEE;
	chomp $line;
	push (@cmds, $line);
    }
}
close COM;

my $counter = 0;

my $LOGDIR = "acf.logs.$$";
unless (-d $LOGDIR) {
    mkdir $LOGDIR or die $!;
    chmod (0775, $LOGDIR);
}


print "Total number of processes allowed at one time: $number_processes_allowed\n" if $SEE;

foreach $asmbl (@ids) {
    if ($asmbl) {
        print "\n------- Processing asmbl: $asmbl\n";
        
        if ($counter >= $number_processes_allowed) {
            print "\tWaiting for a child to exit\n\n" if $SEE;
            wait();
        }
        $counter++;
        my $whoami = fork();
        if (!$whoami) {
            #system "sleep 3"; #pause between launches for humanity's sake.
            print "\nRunning process $counter under child\n\n" if $SEE;
            
            my $output_log = "$LOGDIR/$asmbl.stdout";
            my $errlog = "$LOGDIR/$asmbl.errlog";
            open (OUTLOG, ">$output_log");
            open (STDOUT, ">&OUTLOG");
            open (ERRLOG, ">$errlog");
            open (STDERR, ">&ERRLOG");
            open (RET, ">$LOGDIR/$asmbl.ret");
            my @copy_cmds = @cmds;
            my $cmd_num = 0;
            foreach my $cmd (@copy_cmds) {
                $cmd_num++;
                $cmd =~ s/ASMBL/$asmbl/g;
                print "CMD (asmbl: $asmbl, cmd_num: $cmd_num): $cmd\n";
                
                my $ret = system $cmd;
                print RET "ret($ret)\t$cmd\n";
            }
            close ERRLOG;
            close OUTLOG;
            close RET;
            # remove emtpy files
            unless (-s $output_log) { unlink $output_log;}
            unless (-s $errlog) { unlink $errlog;}
            exit(0);
        }
    }
}

print "End of Parent Process\nWaiting for children to exit\n" if $SEE;

while (wait () > 0) {}

sub option_die {
    
    die;
}


